import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def main():
        
    # related folder to access the csv files 
    folder_path = './csv_files'
    # the output folder to save the processed csv files and the name could be adjusted accordung to the dataset(5D,7D,9D)
    output_folder = './processed_data_5D/'

    os.makedirs(output_folder, exist_ok=True)  # Ensure the output directory exists

    # Get all CSV files in the folder
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

    # Process each CSV file
    for csv_file in csv_files:
        csv_file_path = os.path.join(folder_path, csv_file)

        # Read the CSV file into a DataFrame
        df = pd.read_csv(csv_file_path)

        # detect column names authomatically 
        if 'Time' in df.columns: # 5d dataset
                time_col = 'Time'
                voltage_col = 'Voltage'
                current_col = 'Current'

        else: #7d/9d dataset
                    time_col = 't'
                    voltage_col = 'V'
                    current_col = 'I'
        #interpolated time
        interpolated_time = np.arange(df[time_col].min(), df[time_col].max(), 0.02)  # 20 ms = 0.02 seconds

        # Interpolate Voltage and Current corresponding to the interpolated Time
        interpolated_voltage = np.interp(interpolated_time, df[time_col], df[voltage_col])
        interpolated_current = np.interp(interpolated_time, df[time_col], df[current_col])

        # Extract t_p and calculate cutoff time
        if 't_p' in df.columns: # 5d dataset
                t_p = df['t_p'].iloc[0] 
        else: 
                t_p = t_p = df['tp'].iloc[0] # 7d/9d dataset

        cutoff_time = 2 * t_p  # Cut after the second t_p

        # Filter the interpolated data to keep only the first two cycles
        cut_indices = interpolated_time <= cutoff_time
        cut_time = interpolated_time[cut_indices]
        cut_voltage = interpolated_voltage[cut_indices]
        cut_current = interpolated_current[cut_indices]

        # Interpolate again to ensure 160 timesteps
        final_time = np.linspace(cut_time.min(), cut_time.max(), 161)  # Create 160 evenly spaced points
        final_voltage = np.interp(final_time, cut_time, cut_voltage)  # Interpolate Voltage for the new Time vector
        final_current = np.interp(final_time, cut_time, cut_current)  # Interpolate Current for the new Time vector

        if all(col in df.columns for col in ['t_p', 'h', 'C', 'δ_value']): 
                # 5d dataset
                t_p = df['t_p'].iloc[0]  # Assuming t_p is constant for all rows
                h = df['h'].iloc[0]  # Assuming h is constant for all rows
                C = df['C'].iloc[0]  # Assuming C is constant for all rows
                δ_value = df['δ_value'].iloc[0]  # Assuming δ_value is constant for all rows

                # Create a DataFrame for the processed data
                processed_df = pd.DataFrame({
                    'Time': final_time,
                    't_p' : [t_p] * 161,  # Repeat t_p for 161 rows     
                    'Voltage': final_voltage,
                    'h': [h] * 161,  # Repeat h for 161 rows
                    'C': [C] * 161,  # Repeat C for 161 rows
                    'δ_value': [δ_value] * 161,  # Repeat δ_value for 161 rows
                    'Current': final_current
                })
        else: #7d and  9d dataset
            
                if 'PL' not in df.columns and 'PR' not in df.columns:
                                    t_p = df['tp'].iloc[0]  # Assuming t_p is constant for all rows
                                    h = df['h'].iloc[0]  # Assuming h is constant for all rows
                                    C = df['C'].iloc[0]  # Assuming C is constant for all rows
                                    δ_value = df['d'].iloc[0]  # Assuming δ_value is constant for all rows
                                    Mu = df['mu'].iloc[0]  # Assuming mu is constant for all rows
                                    mux = df['mux'].iloc[0]  # Assuming mux is constant for all rows
                        
                                    processed_df = pd.DataFrame({
                                    'Time': final_time,
                                    't_p' : [t_p] * 161,  # Repeat t_p for 161 rows     
                                    'Voltage': final_voltage,
                                    'h': [h] * 161,  # Repeat h for 161 rows
                                    'C': [C] * 161,  # Repeat C for 161 rows
                                    'δ_value': [δ_value] * 161,  # Repeat δ_value for 161 rows
                                    'mu': [Mu] * 161,  # Repeat mu for 161 rows
                                    'mux': [mux] * 161,  # Repeat mux for 161 rows
                                    'Current': final_current
                                })
                                
                        
                else: 
                        
                        t_p = df['tp'].iloc[0]  # Assuming t_p is constant for all rows
                        h = df['h'].iloc[0]  # Assuming h is constant for all rows
                        C = df['C'].iloc[0]  # Assuming C is constant for all rows
                        δ_value = df['d'].iloc[0]  # Assuming δ_value is constant for all rows
                        Mu = df['mu'].iloc[0]  # Assuming mu is constant for all rows
                        mux = df['mux'].iloc[0]  # Assuming mux is constant for all rows
                        PL = df['PL'].iloc[0]   
                        PR = df['PR'].iloc[0]
                        # Create a DataFrame for the processed data
                        processed_df = pd.DataFrame({
                            'Time': final_time,
                            't_p' : [t_p] * 161,  # Repeat t_p for 161 rows     
                            'Voltage': final_voltage,
                            'h': [h] * 161,  # Repeat h for 161 rows
                            'C': [C] * 161,  # Repeat C for 161 rows
                            'δ_value': [δ_value] * 161,  # Repeat δ_value for 161 rows
                            'mu': [Mu] * 161,  # Repeat mu for 161 rows
                            'mux': [mux] * 161,  # Repeat mux for 161 rows
                            'PL': [PL] * 161,  # Repeat PL for 161 rows
                            'PR': [PR] * 161,  # Repeat PR for 161 rows
                            'Current': final_current
                        })
                        
        # Save the processed data to the output directory
        output_csv_path = os.path.join(output_folder, f'processed_{csv_file}')
        processed_df.to_csv(output_csv_path, index=False)

        print(f"Processed data saved to: {output_csv_path}")
        

if __name__ == "__main__":
        main()
        

